Face detection from training images.
# imports
# !pip install opencv-python
# !pip install scikit-image
import os
import random
import warnings
from time import time
from math import floor
from pathlib import Path
import pandas as pd, numpy as np
from pprint import pprint
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import trange, tqdm
from collections import defaultdict
import tensorflow as tf
import cv2
import skimage
from plantcv import plantcv as pcv
from skimage.io import imread, imshow
from skimage.color import rgb2gray, rgb2hsv, gray2rgb
from skimage.filters import sobel, threshold_otsu, gaussian, unsharp_mask
from skimage.feature import canny
from skimage.measure import find_contours
from skimage.morphology import binary_dilation, dilation
warnings.filterwarnings('ignore')
%matplotlib inline
# reproducibility
seed = 7
random.seed(seed)
data_dir = Path('./data/')
raw_data = np.load(data_dir / 'Part 1- Train data - images.npy', allow_pickle=True) # Load Object Array
raw_data.shape # Total dataset of 409 images present
(409, 2)
raw_data[0].shape
(2,)
pprint(raw_data[0][0], compact=True)
raw_data[0][0].shape # rgb image of 333 x 650
array([[[42, 37, 34],
[56, 51, 48],
[71, 66, 63],
...,
[23, 33, 34],
[26, 36, 37],
[28, 38, 39]],
[[40, 35, 32],
[51, 46, 43],
[64, 59, 56],
...,
[27, 36, 35],
[24, 33, 32],
[26, 35, 34]],
[[43, 38, 35],
[51, 46, 43],
[61, 56, 53],
...,
[28, 30, 27],
[33, 35, 32],
[35, 37, 34]],
...,
[[56, 47, 40],
[57, 48, 41],
[61, 52, 45],
...,
[67, 48, 42],
[55, 35, 28],
[60, 40, 33]],
[[53, 44, 37],
[54, 45, 38],
[57, 48, 41],
...,
[59, 40, 34],
[60, 40, 33],
[54, 34, 27]],
[[53, 44, 37],
[54, 45, 38],
[57, 48, 41],
...,
[59, 40, 34],
[70, 50, 43],
[64, 44, 37]]], dtype=uint8)
(333, 650, 3)
# each datapoint in data has image and associated label/mask coordinates
raw_data[0][1]
[{'label': ['Face'],
'notes': '',
'points': [{'x': 0.08615384615384615, 'y': 0.3063063063063063},
{'x': 0.1723076923076923, 'y': 0.45345345345345345}],
'imageWidth': 650,
'imageHeight': 333},
{'label': ['Face'],
'notes': '',
'points': [{'x': 0.583076923076923, 'y': 0.2912912912912913},
{'x': 0.6584615384615384, 'y': 0.46846846846846846}],
'imageWidth': 650,
'imageHeight': 333}]
def display(img, label, fontsize=18, cmap=None, shape=True):
'''helper to show images'''
if cmap is None:
plt.imshow(img)
else:
plt.imshow(img, cmap=cmap)
plt.title(f'{label}', fontsize=18)
plt.axis('off')
plt.show()
if shape:
print(f'Shape: {img.shape}')
idx = 7
img = raw_data[idx][0]
display(img, label=f'Train Image: {idx}')
Shape: (450, 800, 3)
def plot_images(data=raw_data, num=9, img_type='original', fontsize=18,
func=None, images=None):
''' plotting helper: plots random images
with various optional filters'''
if images is None:
# get sample images randomly
images = list()
labels = list()
for idx in random.sample(range(408), 9):
images.append(data[idx][0])
labels.append(data[idx][1])
fig, ax = plt.subplots(3, 3, figsize=(17, 17))
for n, (ax, img, label) in enumerate(zip(ax.flatten(), images, labels)):
ax.set_title(f'Shape: {img.shape}', fontsize = fontsize)
if func is not None:
img = func(img)
ax.imshow(img, cmap='gray')
elif img_type == 'hsv':
img = rgb2hsv(img) # hsv color space
ax.imshow(img)
elif img_type == 'grayscale':
img = rgb2gray(img) # to grayscale
ax.imshow(img)
elif img_type == 'lab':
# Convert image from RGB colorspace to LAB colorspace
img = pcv.rgb2gray_lab(img, 'a')
img = unsharp_mask(img, radius=1, amount=1)
ax.imshow(img)
elif img_type == 'sobel':
img = sobel(img) # sobel filter
ax.imshow(img)
elif img_type == 'blur':
img = pcv.gaussian_blur(img=img, ksize=(7, 7), sigma_x=0, sigma_y=None)
ax.imshow(img)
elif img_type == 'sharp':
img = pcv.gaussian_blur(img=img, ksize=(3, 3), sigma_x=0, sigma_y=None)
img = unsharp_mask(img, radius=1, amount=1)
ax.imshow(img)
elif img_type == 'edges':
img = pcv.rgb2gray_lab(img, 'a')
img = pcv.threshold.binary(img, 120, 255, 'dark') # threshold
img = pcv.fill(img, 85) # fill noise, small objects
img = unsharp_mask(img, radius=1, amount=1)
# sigma parameter applies the indicated level of guassian smoothing to the image
edges = canny(img, sigma=0.88) # get edges using canny detector algorithm
ax.imshow(edges, cmap='gray')
else:
ax.imshow(img)
ax.axis('off')
fig.tight_layout()
plot_images()
plot_images(img_type='sobel')
plot_images(img_type='hsv')
plot_images(img_type='sharp')
# Reqwuired dimensions for mobilenet
ALPHA = 1
IMAGE_SIZE = 224
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
from tensorflow.keras.applications.mobilenet import preprocess_input
CHANNELS = 3
# create a zero array to be filled with 1s to make the mask using the labels part of each image from dataset
masks = np.zeros((int(raw_data.shape[0]), IMAGE_HEIGHT, IMAGE_WIDTH))
X = np.zeros((int(raw_data.shape[0]), IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
for index in trange(raw_data.shape[0]):
img = raw_data[index][0] # img
# resize each image to the size IMAGE_HEIGHT x IMAGE_WIDTH (224 x 244) to use with MobileNet
img = cv2.resize(img, dsize=(IMAGE_HEIGHT, IMAGE_WIDTH), interpolation=cv2.INTER_CUBIC)
try:
img = img[:, :, :3]
except Exception as e:
# exclude the single image which is not rgb
print(str(e))
display(img, label = 'Excluded Image', cmap='gray', shape=False)
continue
# apply the preprocessing required for MobileNet on the img that is in correct shape
X[index] = preprocess_input(np.array(img, dtype=np.float32))
# get the mask coordinates and fill the portion of array in between the coords as 1
for i in raw_data[index][1]:
x1 = int(i['points'][0]['x'] * IMAGE_WIDTH)
x2 = int(i['points'][1]['x'] * IMAGE_WIDTH)
y1 = int(i['points'][0]['y'] * IMAGE_HEIGHT)
y2 = int(i['points'][1]['y'] * IMAGE_HEIGHT)
masks[index][y1:y2, x1:x2] = 1
65%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 265/409 [00:00<00:00, 1288.49it/s]
too many indices for array: array is 2-dimensional, but 3 were indexed
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 409/409 [00:00<00:00, 1103.87it/s]
X.shape
(409, 224, 224, 3)
masks.shape
(409, 224, 224)
from PIL import Image
def display_mask(img, mask, label, fontsize=18, alpha=0.25, binarize_mask=False):
'''helper to show images'''
if binarize_mask:
thresh = threshold_otsu(mask)
mask = mask > thresh
rows, cols = img.shape[0], img.shape[1]
color_mask = np.zeros((rows, cols, 3))
color_mask[:, :, 1] = mask
img_hsv = skimage.color.rgb2hsv(img)
color_mask_hsv = skimage.color.rgb2hsv(color_mask)
# Replace the hue and saturation of the original image
# with that of the color mask
img_hsv[..., 0] = color_mask_hsv[..., 0]
img_hsv[..., 1] = color_mask_hsv[..., 1] * alpha
img_masked = skimage.color.hsv2rgb(img_hsv)
plt.figure(figsize=(10, 10))
plt.title(f'{label}', fontsize=18)
plt.axis('off')
plt.imshow(img_masked)
plt.show()
idx = 11
img = X[idx]
mask = masks[idx]
display_mask(img, mask, label=f'Train Image Overlayed with Mask')
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, masks, test_size=0.02, random_state=seed, shuffle=False)
X_train.shape, X_test.shape
((400, 224, 224, 3), (9, 224, 224, 3))
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input,
Conv2D,
UpSampling2D,
Activation,
Concatenate,
Reshape,
BatchNormalization,
SpatialDropout2D)
from tensorflow.keras.applications.mobilenet import MobileNet
# using a predefined architecture with MobileNet Model with imagenet weights loaded
# & adding U-Net Model Layers which are the correspoding upsampling layers to get the "U" Model which works well for image segmentation problems
def conv_block_simple(prev_layer, filters, prefix, strides=(1, 1)):
'''retrusn a convolutional block using the given filters and strides using keras Functional API'''
conv = Conv2D(filters, (3, 3), padding='same', kernel_initializer='he_normal', strides=strides, name=prefix + '_conv')(prev_layer)
batch_norm = BatchNormalization(name = prefix+'BatchNormalization')(conv)
activations = Activation('relu', name = prefix+'ActivationLayer')(batch_norm)
return activations
def create_model(trainable=True):
# using imagenet weights as backend and alpha=1.0
model = MobileNet(input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3), include_top=False, alpha=1.0, weights="imagenet")
for layer in model.layers:
layer.trainable = trainable
block1 = model.get_layer('conv_pw_13_relu').output
block2 = model.get_layer('conv_pw_11_relu').output
block3 = model.get_layer('conv_pw_5_relu').output
block4 = model.get_layer('conv_pw_3_relu').output
block5 = model.get_layer('conv_pw_1_relu').output
up1 = Concatenate()([UpSampling2D()(block1), block2])
conv6 = conv_block_simple(up1, 256, 'Conv_6_1')
conv6 = conv_block_simple(conv6, 256, 'Conv_6_2')
up2 = Concatenate()([UpSampling2D()(conv6), block3])
conv7 = conv_block_simple(up2, 256, 'Conv_7_1')
conv7 = conv_block_simple(conv7, 256, 'Conv_7_2')
up3 = Concatenate()([UpSampling2D()(conv7), block4])
conv8 = conv_block_simple(up3, 192, 'Conv_8_1')
conv8 = conv_block_simple(conv8, 128, 'Conv_8_2')
up4 = Concatenate()([UpSampling2D()(conv8), block5])
conv9 = conv_block_simple(up4, 96, 'Conv_9_1')
conv9 = conv_block_simple(conv9, 64, 'Conv_9_2')
up5 = Concatenate()([UpSampling2D()(conv9), model.input])
conv10 = conv_block_simple(up5, 48, 'Conv_10_1')
conv10 = conv_block_simple(conv10, 32, 'Conv_10_2')
conv10 = SpatialDropout2D(0.2)(conv10)
x = Conv2D(1, (1, 1), activation = 'sigmoid')(conv10)
x = Reshape((IMAGE_SIZE, IMAGE_SIZE))(x)
return Model(inputs = model.input, outputs = x)
model = create_model()
from keras.utils.layer_utils import count_params
def get_params(model):
trainable_count = count_params(model.trainable_weights)
non_trainable_count = count_params(model.non_trainable_weights)
print('Total params: {:,}'.format(trainable_count + non_trainable_count))
print('Trainable params: {:,}'.format(trainable_count))
print('Non-trainable params: {:,}'.format(non_trainable_count))
get_params(model)
Total params: 10,283,745 Trainable params: 10,258,689 Non-trainable params: 25,056
Using TensorFlow backend.
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.backend import log, epsilon
def dice_coefficient(true, pred):
'''returns value of dice coefficient metric using true values and predicted values'''
numerator = 2 * tf.reduce_sum(true * pred)
denominator = tf.reduce_sum(true + pred)
# use a small epsilon in the dominator to avoid ZeroDivisionErrors and nan while training the model
return numerator / (denominator + epsilon())
def custom_loss(true, pred):
'''custom loss function using the dice coefficient'''
# use a small epsilon in the dominator to avoid ZeroDivisionErrors and nan while training the model
return binary_crossentropy(true, pred) - log(dice_coefficient(true, pred) + epsilon())
# !pip install livelossplot
from tensorflow.python.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Activation, BatchNormalization, Dropout
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.python.keras.callbacks import LambdaCallback, EarlyStopping, ReduceLROnPlateau
# Define Callbacks and a few helper functions
# To simplify the log
simple_log = LambdaCallback(
on_epoch_end = lambda e, l: print(f" ~| Epoch: {e+1} | Validation Loss: {l['val_loss']:.5f} | Dice Coefficient: {l['val_dice_coefficient']:.5f} | ", end =" >|> \n" ))
early_stop = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=10,
verbose=1,
restore_best_weights=True)
# learning rate reduction
lr_reduce_on_plateau = ReduceLROnPlateau(monitor='val_loss',
patience=4,
verbose=1,
factor=0.4,
min_lr=0.00001)
def plot_learning_curve(hist, metric='loss'):
'''helper function to plot the learning curve'''
sns.set()
plt.figure(figsize=(5, 4))
train = hist.history[metric]
val = hist.history[f'val_{metric}']
epochs_run = range(1,len(train) + 1)
sns.lineplot(epochs_run, train, marker = 'o', color = 'coral', label = 'Training')
sns.lineplot(epochs_run, val, marker = '>', color = 'green', label = 'Validation')
plt.title(f"{metric.capitalize()} vs. Epochs", fontsize = 20)
plt.legend()
plt.show()
# compile model
model.compile('Adam', loss=custom_loss, metrics=[dice_coefficient])
epochs = 300
batch_size = 32
with tf.device("gpu:0"):
h1 = model.fit(X_train, y_train,
epochs=epochs,
batch_size=batch_size,
validation_split=0.14,
callbacks = [early_stop, simple_log, lr_reduce_on_plateau],
verbose = False)
~| Epoch: 1 | Validation Loss: 14.85979 | Dice Coefficient: 0.21708 | >|> ~| Epoch: 2 | Validation Loss: 14.85978 | Dice Coefficient: 0.21708 | >|> ~| Epoch: 3 | Validation Loss: 14.85952 | Dice Coefficient: 0.21708 | >|> ~| Epoch: 4 | Validation Loss: 14.85745 | Dice Coefficient: 0.21708 | >|> ~| Epoch: 5 | Validation Loss: 14.82395 | Dice Coefficient: 0.21711 | >|> ~| Epoch: 6 | Validation Loss: 14.52242 | Dice Coefficient: 0.21783 | >|> ~| Epoch: 7 | Validation Loss: 12.91274 | Dice Coefficient: 0.22335 | >|> ~| Epoch: 8 | Validation Loss: 11.27742 | Dice Coefficient: 0.23083 | >|> ~| Epoch: 9 | Validation Loss: 7.55217 | Dice Coefficient: 0.25888 | >|> ~| Epoch: 10 | Validation Loss: 3.21503 | Dice Coefficient: 0.35395 | >|> ~| Epoch: 11 | Validation Loss: 2.29382 | Dice Coefficient: 0.40668 | >|> ~| Epoch: 12 | Validation Loss: 1.76228 | Dice Coefficient: 0.53190 | >|> ~| Epoch: 13 | Validation Loss: 2.37711 | Dice Coefficient: 0.46654 | >|> ~| Epoch: 14 | Validation Loss: 1.31083 | Dice Coefficient: 0.56680 | >|> ~| Epoch: 15 | Validation Loss: 1.00203 | Dice Coefficient: 0.62120 | >|> ~| Epoch: 16 | Validation Loss: 0.89691 | Dice Coefficient: 0.62448 | >|> ~| Epoch: 17 | Validation Loss: 0.90492 | Dice Coefficient: 0.61931 | >|> ~| Epoch: 18 | Validation Loss: 0.84834 | Dice Coefficient: 0.62418 | >|> ~| Epoch: 19 | Validation Loss: 0.77228 | Dice Coefficient: 0.66225 | >|> ~| Epoch: 20 | Validation Loss: 0.89034 | Dice Coefficient: 0.64864 | >|> ~| Epoch: 21 | Validation Loss: 0.73284 | Dice Coefficient: 0.65599 | >|> ~| Epoch: 22 | Validation Loss: 0.74267 | Dice Coefficient: 0.67873 | >|> ~| Epoch: 23 | Validation Loss: 0.71065 | Dice Coefficient: 0.66984 | >|> ~| Epoch: 24 | Validation Loss: 0.73286 | Dice Coefficient: 0.69672 | >|> ~| Epoch: 25 | Validation Loss: 0.78827 | Dice Coefficient: 0.68523 | >|> ~| Epoch: 26 | Validation Loss: 0.79775 | Dice Coefficient: 0.69009 | >|> ~| Epoch: 27 | Validation Loss: 0.74278 | Dice Coefficient: 0.69819 | >|> Epoch 00027: ReduceLROnPlateau reducing learning rate to 0.0004000000189989805. ~| Epoch: 28 | Validation Loss: 0.71955 | Dice Coefficient: 0.69033 | >|> ~| Epoch: 29 | Validation Loss: 0.70608 | Dice Coefficient: 0.69818 | >|> ~| Epoch: 30 | Validation Loss: 0.72440 | Dice Coefficient: 0.68061 | >|> ~| Epoch: 31 | Validation Loss: 0.70435 | Dice Coefficient: 0.68107 | >|> ~| Epoch: 32 | Validation Loss: 0.70101 | Dice Coefficient: 0.67767 | >|> ~| Epoch: 33 | Validation Loss: 0.68917 | Dice Coefficient: 0.69228 | >|> ~| Epoch: 34 | Validation Loss: 0.70251 | Dice Coefficient: 0.67801 | >|> ~| Epoch: 35 | Validation Loss: 0.68847 | Dice Coefficient: 0.68584 | >|> ~| Epoch: 36 | Validation Loss: 0.68071 | Dice Coefficient: 0.68515 | >|> ~| Epoch: 37 | Validation Loss: 0.66019 | Dice Coefficient: 0.69386 | >|> ~| Epoch: 38 | Validation Loss: 0.70986 | Dice Coefficient: 0.67447 | >|> ~| Epoch: 39 | Validation Loss: 0.69470 | Dice Coefficient: 0.68071 | >|> ~| Epoch: 40 | Validation Loss: 0.69702 | Dice Coefficient: 0.67733 | >|> ~| Epoch: 41 | Validation Loss: 0.69001 | Dice Coefficient: 0.68248 | >|> Epoch 00041: ReduceLROnPlateau reducing learning rate to 0.00016000000759959222. ~| Epoch: 42 | Validation Loss: 0.69772 | Dice Coefficient: 0.67821 | >|> ~| Epoch: 43 | Validation Loss: 0.70048 | Dice Coefficient: 0.67635 | >|> ~| Epoch: 44 | Validation Loss: 0.73087 | Dice Coefficient: 0.66161 | >|> ~| Epoch: 45 | Validation Loss: 0.70977 | Dice Coefficient: 0.67043 | >|> Epoch 00045: ReduceLROnPlateau reducing learning rate to 6.40000042039901e-05. ~| Epoch: 46 | Validation Loss: 0.69623 | Dice Coefficient: 0.67934 | >|> Restoring model weights from the end of the best epoch. ~| Epoch: 47 | Validation Loss: 0.70125 | Dice Coefficient: 0.67878 | >|> Epoch 00047: early stopping
plot_learning_curve(h1, metric='loss')
def scores_nn(model):
loss, dice = model.evaluate(X_train, y_train, verbose=False)
test_loss, test_dice = model.evaluate(X_test, y_test, verbose=False)
print(f"Train: Loss: {loss:.3f}, Dice Coefficient: {dice:.3f}")
print(f"Test: Loss: {test_loss:.3f}, Dice Coefficient: {test_dice:.3f}")
scores_nn(model)
Train: Loss: 0.169, Dice Coefficient: 0.895 Test: Loss: 0.491, Dice Coefficient: 0.759
best_model = model
filename = 'Face_Detector'
best_model.save(f"{filename}.h5")
# some time later...
# load the model from disk
from tensorflow.keras.models import load_model
# load model
face_detector = load_model(f"{filename}.h5", compile=False)
get_params(face_detector)
# compile model
face_detector.compile('Adam', loss=custom_loss, metrics=[dice_coefficient])
# Hence, this model can be loaded and re-used anywhere with similar datasets
Total params: 10,283,745 Trainable params: 10,258,689 Non-trainable params: 25,056
img_path = Path('./data/Part 1 - Test Data - Prediction Image.jpeg')
test_img = imread(img_path)
display(test_img, label='Test Image')
Shape: (500, 500, 3)
# predict
def detect_faces(image, model=face_detector):
plt.figure(figsize=(10, 10))
display(image, label='Test Image')
# reshape to IMAGE_HEIGHT x IMAGE_WIDTH
image_reshaped = cv2.resize(image, dsize=(IMAGE_HEIGHT, IMAGE_WIDTH), interpolation=cv2.INTER_CUBIC)
plt.figure(figsize=(10, 10))
display(image_reshaped, label='Reshaped')
# preprocess
image_preprocessed = preprocess_input(np.array(image_reshaped, dtype=np.float32))
plt.figure(figsize=(10, 10))
display(image_preprocessed, label='Preprocessed')
image_preprocessed
# reshape the input tensor to expected shape=(1, 224, 224, 3) to make it compatible with mdoel format
model_input = image_preprocessed.reshape(
[1, image_preprocessed.shape[0], image_preprocessed.shape[1], image_preprocessed.shape[2]])
pred_mask = model.predict(model_input, verbose=False)
pred_mask = np.squeeze(pred_mask)
# pred_mask = cv2.resize(1.0*(pred_mask > 0.1), (IMAGE_WIDTH, IMAGE_HEIGHT))
plt.figure(figsize=(10, 10))
display(pred_mask, label=f'The Mask')
display_mask(image_preprocessed, pred_mask, label=f'Test Image Overlayed with Mask', binarize_mask=True)
# detect faces in the prediction image
detect_faces(test_img)
Shape: (500, 500, 3)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Shape: (224, 224, 3)
Shape: (224, 224, 3)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Shape: (224, 224)